key1 <-  read.table(file = './Data/Strain.clone.info.tsv', sep = '\t', header = TRUE) %>% rename(parent_ST=ID, new_ST=Strain , strain=Hartwell)
g1 <- read.table(file = './Data/Group1 gene.presence_absence.tsv', sep = '\t', header = TRUE)
g2 <- g1 %>%
  mutate(strain= row.names(g1)) %>%
  left_join(key1, by='strain')%>%
  select(-strain, -parent_ST) %>%
  mutate(ID2 = paste(new_ST,Clone, sep='_'))

test <- unique(g1)
nrow(test)==nrow(g1) #if T, no rows removed
## [1] TRUE
g2.m <- g2 %>%
    mutate(ID2 = paste(new_ST,Clone, sep='_')) %>%
  reshape2::melt(id.vars=c('new_ST','Clone','ID2')) %>%
  arrange(ID2, Clone) %>%
  mutate(value=as.factor(value), group=grepl('group',variable)) %>%
  filter(group==0)

# Simple heatmap, unstructured except sort by ST
ggplot(g2.m ,aes(variable, ID2, fill= value)) + 
  geom_tile()

focus on genes that have a clear annotation

g2a <- g2 %>%
    mutate(ID2 = paste(new_ST,Clone, sep='_'))
row.names(g2a) <- g2a$ID2

mat1 <- g2a %>%
  dplyr::select(-new_ST,-Clone, -ID2) %>%
  as.matrix()

groupvar=grep('group',colnames(mat1))

mat1 <- mat1[,-groupvar]
library(pheatmap)
p2 <- pheatmap(mat1)
p2

this takes a really long time to render

p <- heatmaply(mat1, 
        #dendrogram = "row",
        xlab = "", ylab = "", 
        main = "",
        scale = "none",
        margins = c(60,100,40,20),
        grid_gap=0,
        titleX = FALSE,
        hide_colorbar = TRUE,
        branches_lwd = 0.1,
        label_names = c("Serotype/isolate", "Feature:", "Value"),
        fontsize_row = 5, fontsize_col = 5,
        labCol = colnames(mat1),
        labRow = rownames(mat1),
        heatmap_layers = theme(axis.line=element_blank())
        )
p